In [1]:
import plotly.express as px
import plotly.graph_objects as go
import matplotlib.pyplot as plt
import pandas as pd
In [2]:
data = pd.read_csv("World_Population_Data.csv", encoding='cp1252')

Check the dataset and data types¶

In [3]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Index                    234 non-null    int64  
 1   Country (or dependency)  234 non-null    object 
 2   Continent                234 non-null    object 
 3   Population (2023)        234 non-null    object 
 4   Yearly Change            234 non-null    object 
 5   Net Change               234 non-null    object 
 6   Density (P/Kmý)          234 non-null    object 
 7   Land Area (Kmý)          234 non-null    object 
 8   Migrants (net)           234 non-null    object 
 9   Fert.  Rate              233 non-null    float64
 10  Med. Age                 233 non-null    float64
 11  Urban Pop %              234 non-null    object 
 12  World Share              234 non-null    object 
dtypes: float64(2), int64(1), object(10)
memory usage: 23.9+ KB
In [4]:
data
Out[4]:
Index Country (or dependency) Continent Population (2023) Yearly Change Net Change Density (P/Kmý) Land Area (Kmý) Migrants (net) Fert. Rate Med. Age Urban Pop % World Share
0 1 India Asia 1,428,627,663 0.81 % 11,454,490 481 2,973,190 -486,136 2.0 28.0 36 % 17.76 %
1 2 China Asia 1,425,671,352 -0.02 % -215,985 152 9,388,211 -310,220 1.2 39.0 65 % 17.72 %
2 3 United States America 339,996,563 0.50 % 1,706,706 37 9,147,420 999,700 1.7 38.0 83 % 4.23 %
3 4 Indonesia Asia 277,534,122 0.74 % 2,032,783 153 1,811,570 -49,997 2.1 30.0 59 % 3.45 %
4 5 Pakistan Asia 240,485,658 1.98 % 4,660,796 312 770,880 -165,988 3.3 21.0 35 % 2.99 %
... ... ... ... ... ... ... ... ... ... ... ... ... ...
229 230 Montserrat America 4,386 -0.09 % -4 44 100 0 1.6 44.0 11 % 0.00 %
230 231 Falkland Islands America 3,791 0.29 % 11 0 12,170 0 1.6 40.0 62 % 0.00 %
231 232 Niue Oceania 1,935 0.05 % 1 7 260 0 2.4 36.0 41 % 0.00 %
232 233 Tokelau Oceania 1,893 1.18 % 22 189 10 0 2.6 27.0 0 % 0.00 %
233 234 Holy See Europe 518 1.57 % 8 1,295 0 0 NaN NaN N.A. 0.00 %

234 rows × 13 columns

Fix the population data type¶

In [5]:
data["Population (2023)"].replace(',','', regex=True, inplace=True)
In [6]:
data["World Share"].replace(' %', '', regex=True, inplace=True)
In [7]:
data["Population (2023)"] = pd.to_numeric(data["Population (2023)"])
In [8]:
data["World Share"] = pd.to_numeric(data["World Share"])
In [9]:
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 234 entries, 0 to 233
Data columns (total 13 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   Index                    234 non-null    int64  
 1   Country (or dependency)  234 non-null    object 
 2   Continent                234 non-null    object 
 3   Population (2023)        234 non-null    int64  
 4   Yearly Change            234 non-null    object 
 5   Net Change               234 non-null    object 
 6   Density (P/Kmý)          234 non-null    object 
 7   Land Area (Kmý)          234 non-null    object 
 8   Migrants (net)           234 non-null    object 
 9   Fert.  Rate              233 non-null    float64
 10  Med. Age                 233 non-null    float64
 11  Urban Pop %              234 non-null    object 
 12  World Share              234 non-null    float64
dtypes: float64(3), int64(2), object(8)
memory usage: 23.9+ KB

Total population in our planet¶

In [10]:
Toral_population = data['Population (2023)'].sum()
In [11]:
print(f"Total Population in our planet: {Toral_population}")
Total Population in our planet: 8043901603

Total population in our planet by continent¶

In [12]:
Toral_population_by_continent = data.groupby('Continent')['Population (2023)'].sum().reset_index().sort_values(by=['Population (2023)'], ascending=False)
In [13]:
Toral_population_by_continent
Out[13]:
Continent Population (2023)
2 Asia 4751819588
0 Africa 1460481772
1 America 1043901526
3 Europe 741693851
4 Oceania 46004866
In [ ]:
 

Visualize population by country¶

In [14]:
Country = data["Country (or dependency)"]
Population = data["Population (2023)"]
Percentage = data["World Share"]
Continent = data["Continent"]
In [15]:
from kaleido.scopes.plotly import PlotlyScope
In [16]:
import plotly.io as pio
pio.renderers.default = "png"
In [ ]:
 
In [18]:
fig = px.treemap(data_frame=data, 
                 path=[px.Constant("World Population by Country"), Country],          
                 values=Population,
                 color=Percentage,
                 color_continuous_scale=["#E4F1FF", "#0174BE", "#0174BE", "#4477CE"],
                )


fig.update_traces(
    textinfo="label+value+percent parent",
)

fig.update_layout(margin=dict(t=30, l=30, r=20, b=20),)

fig.update_coloraxes(showscale=False)

fig.show("notebook")

Visualize by Continent¶

In [19]:
fig = px.treemap(
    data_frame=data,
    path=[px.Constant("World Population by Continent"), Continent, Country],
    values=Population,
    color=Percentage,
    color_continuous_scale=["#D8E9F0", "#0174BE", "#0174BE", "#4477CE"],
    branchvalues='total',
    labels={'Percentage': 'World Share'},
)

fig.update_traces(
    textinfo="label+percent parent",
    hovertemplate=None,
    hoverinfo="skip",
)

fig.update_layout(margin=dict(t=30, l=30, r=20, b=20),
                  font=dict(
                      family="verdana",
                      size=14
                  ))


fig.update_coloraxes(showscale=False)

fig.show("notebook")

Grouping data by continent and world share percentage¶

I will use it to see how much each continent covers the total world population.
In [20]:
world_share_group = data.groupby('Continent')["World Share"].sum().reset_index()
In [21]:
world_share_group
Out[21]:
Continent World Share
0 Africa 18.17
1 America 12.97
2 Asia 59.08
3 Europe 9.24
4 Oceania 0.55
In [22]:
world_share_group.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 2 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   Continent    5 non-null      object 
 1   World Share  5 non-null      float64
dtypes: float64(1), object(1)
memory usage: 208.0+ bytes

Import Bubble Chart from local file¶

In [23]:
from packed_bubbles import BubbleChart
from matplotlib import rcParams
In [24]:
unique_continent = world_share_group["Continent"]
world_share_by_continent = world_share_group["World Share"]

Create bubble chart¶

In [25]:
color_continuous_scale=["#5DAE8B", '#71a9f7', "#1450A3", "#FFD00C", "#FF9C6D" ]


bubble_chart = BubbleChart(area=world_share_by_continent,
                           bubble_spacing=0.5)

bubble_chart.collapse()

fig, ax = plt.subplots(subplot_kw=dict(aspect="equal"),)


bubble_chart.plot(
    ax, np.ma.masked_where(world_share_by_continent <= 1, world_share_by_continent.round(2)), color_continuous_scale)


legend = plt.legend(unique_continent, loc='upper center', bbox_to_anchor=(0.5, -0.1), ncol=5, frameon=False)
for text in legend.get_texts():
    text.set_color("black")


font = {'size': 14}

 
# using rc function
plt.rc('font', **font)


COLOR = 'white'
plt.rcParams['text.color'] = COLOR

ax.axis("off")
ax.relim()
ax.autoscale_view()
ax.set_title('"World population cover by each continent"', loc='center')


plt.show()
In [26]:
world_share_group_sorted = world_share_group.sort_values(by="World Share", ascending=False)

fig = go.Figure(data=[go.Bar(
            x=world_share_group_sorted["Continent"], 
            y=world_share_group_sorted["World Share"].round(2),
            text=world_share_group_sorted["World Share"].round(2),
            textposition='auto',
            marker=dict(color='#4477CE')
        )])

fig.update_layout(
    plot_bgcolor='rgba(0,0,0,0)', 
    paper_bgcolor='rgba(0,0,0,0)',
    title=("World population cover by each continent (Bar chart)")
)

fig.show("notebook")
In [ ]: